{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gym\n",
    "import random\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Observation space: Discrete(16)\n",
      "Action space: Discrete(4)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "gym.spaces.discrete.Discrete"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env_name = \"CartPole-v1\"\n",
    "env_name = \"MountainCar-v0\"\n",
    "env_name = \"MountainCarContinuous-v0\"\n",
    "env_name = \"Acrobot-v1\"\n",
    "env_name = \"Pendulum-v0\"\n",
    "env_name = \"FrozenLake-v0\"\n",
    "env = gym.make(env_name)\n",
    "print(\"Observation space:\", env.observation_space)\n",
    "print(\"Action space:\", env.action_space)\n",
    "type(env.action_space)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Agent():\n",
    "    def __init__(self, env):\n",
    "        self.is_discrete = \\\n",
    "            type(env.action_space) == gym.spaces.discrete.Discrete\n",
    "        \n",
    "        if self.is_discrete:\n",
    "            self.action_size = env.action_space.n\n",
    "            print(\"Action size:\", self.action_size)\n",
    "        else:\n",
    "            self.action_low = env.action_space.low\n",
    "            self.action_high = env.action_space.high\n",
    "            self.action_shape = env.action_space.shape\n",
    "            print(\"Action range:\", self.action_low, self.action_high)\n",
    "        \n",
    "    def get_action(self, state):\n",
    "        if self.is_discrete:\n",
    "            action = random.choice(range(self.action_size))\n",
    "        else:\n",
    "            action = np.random.uniform(self.action_low,\n",
    "                                       self.action_high,\n",
    "                                       self.action_shape)\n",
    "        return action"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Action size: 4\n",
      "  (Up)\n",
      "\u001b[41mS\u001b[0mFFF\n",
      "FHFH\n",
      "FFFH\n",
      "HFFG\n",
      "  (Up)\n",
      "\u001b[41mS\u001b[0mFFF\n",
      "FHFH\n",
      "FFFH\n",
      "HFFG\n",
      "  (Right)\n",
      "S\u001b[41mF\u001b[0mFF\n",
      "FHFH\n",
      "FFFH\n",
      "HFFG\n",
      "  (Down)\n",
      "SF\u001b[41mF\u001b[0mF\n",
      "FHFH\n",
      "FFFH\n",
      "HFFG\n",
      "  (Up)\n",
      "SF\u001b[41mF\u001b[0mF\n",
      "FHFH\n",
      "FFFH\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FH\u001b[41mF\u001b[0mH\n",
      "FFFH\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FF\u001b[41mF\u001b[0mH\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Left)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Down)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Up)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n",
      "  (Right)\n",
      "SFFF\n",
      "FHFH\n",
      "FFF\u001b[41mH\u001b[0m\n",
      "HFFG\n"
     ]
    }
   ],
   "source": [
    "agent = Agent(env)\n",
    "state = env.reset()\n",
    "\n",
    "for _ in range(200):\n",
    "#     action = env.action_space.sample()\n",
    "    action = agent.get_action(state)\n",
    "    state, reward, done, info = env.step(action)\n",
    "    env.render()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
